FIFA 22 is a football simulation video game published by Electronic Arts as part of the FIFA series. It is the 29th installment in the FIFA sereis, and was released worldwide on 1 October 2021 for Microsoft Windows, Nintendo Switch, PlayStation 4, PlayStation 5, Xbox One, and Xbox Series X/S.
source: Wikipedia
library(tidyverse)
library(magrittr)
library(DataExplorer)
library(maps)
library(plotly)
library(DT)
library(tidytext)
library(gridExtra)
library(factoextra)
library(kableExtra)
library(splitstackshape)
library(ggthemes)
library(data.table)
library(waffle)
library(knitr)
options(scipen = 999)
df <- read.csv("../db/players_22.csv", encoding = "UTF-8")
kable(t(head(df,3)), "html") %>% kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>% scroll_box(width = "100%")
| 1 | 2 | 3 | |
|---|---|---|---|
| sofifa_id | 158023 | 188545 | 20801 |
| player_url | https://sofifa.com/player/158023/lionel-messi/220002 | https://sofifa.com/player/188545/robert-lewandowski/220002 | https://sofifa.com/player/20801/c-ronaldo-dos-santos-aveiro/220002 |
| short_name | L. Messi | R. Lewandowski | Cristiano Ronaldo |
| long_name | Lionel Andrés Messi Cuccittini | Robert Lewandowski | Cristiano Ronaldo dos Santos Aveiro |
| player_positions | RW, ST, CF | ST | ST, LW |
| overall | 93 | 92 | 91 |
| potential | 93 | 92 | 91 |
| value_eur | 78000000 | 119500000 | 45000000 |
| wage_eur | 320000 | 270000 | 270000 |
| age | 34 | 32 | 36 |
| dob | 1987-06-24 | 1988-08-21 | 1985-02-05 |
| height_cm | 170 | 185 | 187 |
| weight_kg | 72 | 81 | 83 |
| club_team_id | 73 | 21 | 11 |
| club_name | Paris Saint-Germain | FC Bayern München | Manchester United |
| league_name | French Ligue 1 | German 1. Bundesliga | English Premier League |
| league_level | 1 | 1 | 1 |
| club_position | RW | ST | ST |
| club_jersey_number | 30 | 9 | 7 |
| club_loaned_from | |||
| club_joined | 2021-08-10 | 2014-07-01 | 2021-08-27 |
| club_contract_valid_until | 2023 | 2023 | 2023 |
| nationality_id | 52 | 37 | 38 |
| nationality_name | Argentina | Poland | Portugal |
| nation_team_id | 1369 | 1353 | 1354 |
| nation_position | RW | RS | ST |
| nation_jersey_number | 10 | 9 | 7 |
| preferred_foot | Left | Right | Right |
| weak_foot | 4 | 4 | 4 |
| skill_moves | 4 | 4 | 5 |
| international_reputation | 5 | 5 | 5 |
| work_rate | Medium/Low | High/Medium | High/Low |
| body_type | Unique | Unique | Unique |
| real_face | Yes | Yes | Yes |
| release_clause_eur | 144300000 | 197200000 | 83300000 |
| player_tags | #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Distance Shooter, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Dribbler, #Distance Shooter, #Crosser, #Acrobat, #Clinical Finisher, #Complete Forward |
| player_traits | Finesse Shot, Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Chip Shot (AI), Technical Dribbler (AI) | Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI) | Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot |
| pace | 85 | 78 | 87 |
| shooting | 92 | 92 | 94 |
| passing | 91 | 79 | 80 |
| dribbling | 95 | 86 | 88 |
| defending | 34 | 44 | 34 |
| physic | 65 | 82 | 75 |
| attacking_crossing | 85 | 71 | 87 |
| attacking_finishing | 95 | 95 | 95 |
| attacking_heading_accuracy | 70 | 90 | 90 |
| attacking_short_passing | 91 | 85 | 80 |
| attacking_volleys | 88 | 89 | 86 |
| skill_dribbling | 96 | 85 | 88 |
| skill_curve | 93 | 79 | 81 |
| skill_fk_accuracy | 94 | 85 | 84 |
| skill_long_passing | 91 | 70 | 77 |
| skill_ball_control | 96 | 88 | 88 |
| movement_acceleration | 91 | 77 | 85 |
| movement_sprint_speed | 80 | 79 | 88 |
| movement_agility | 91 | 77 | 86 |
| movement_reactions | 94 | 93 | 94 |
| movement_balance | 95 | 82 | 74 |
| power_shot_power | 86 | 90 | 94 |
| power_jumping | 68 | 85 | 95 |
| power_stamina | 72 | 76 | 77 |
| power_strength | 69 | 86 | 77 |
| power_long_shots | 94 | 87 | 93 |
| mentality_aggression | 44 | 81 | 63 |
| mentality_interceptions | 40 | 49 | 29 |
| mentality_positioning | 93 | 95 | 95 |
| mentality_vision | 95 | 81 | 76 |
| mentality_penalties | 75 | 90 | 88 |
| mentality_composure | 96 | 88 | 95 |
| defending_marking_awareness | 20 | 35 | 24 |
| defending_standing_tackle | 35 | 42 | 32 |
| defending_sliding_tackle | 24 | 19 | 24 |
| goalkeeping_diving | 6 | 15 | 7 |
| goalkeeping_handling | 11 | 6 | 11 |
| goalkeeping_kicking | 15 | 12 | 15 |
| goalkeeping_positioning | 14 | 8 | 14 |
| goalkeeping_reflexes | 8 | 10 | 11 |
| goalkeeping_speed | NA | NA | NA |
| ls | 89+3 | 90+2 | 90+1 |
| st | 89+3 | 90+2 | 90+1 |
| rs | 89+3 | 90+2 | 90+1 |
| lw | 92 | 85 | 88 |
| lf | 93 | 88 | 89 |
| cf | 93 | 88 | 89 |
| rf | 93 | 88 | 89 |
| rw | 92 | 85 | 88 |
| lam | 93 | 86+3 | 86+3 |
| cam | 93 | 86+3 | 86+3 |
| ram | 93 | 86+3 | 86+3 |
| lm | 91+2 | 84+3 | 86+3 |
| lcm | 87+3 | 80+3 | 78+3 |
| cm | 87+3 | 80+3 | 78+3 |
| rcm | 87+3 | 80+3 | 78+3 |
| rm | 91+2 | 84+3 | 86+3 |
| lwb | 66+3 | 64+3 | 63+3 |
| ldm | 64+3 | 66+3 | 59+3 |
| cdm | 64+3 | 66+3 | 59+3 |
| rdm | 64+3 | 66+3 | 59+3 |
| rwb | 66+3 | 64+3 | 63+3 |
| lb | 61+3 | 61+3 | 60+3 |
| lcb | 50+3 | 60+3 | 53+3 |
| cb | 50+3 | 60+3 | 53+3 |
| rcb | 50+3 | 60+3 | 53+3 |
| rb | 61+3 | 61+3 | 60+3 |
| gk | 19+3 | 19+3 | 20+3 |
| player_face_url | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/players/020/801/22_120.png |
| club_logo_url | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/teams/11/60.png |
| club_flag_url | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/flags/gb-eng.png |
| nation_logo_url | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/teams/1354/60.png |
| nation_flag_url | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/pl.png | https://cdn.sofifa.net/flags/pt.png |
Ther are 110 columns and 19239 observations on the data.
dim(df)
## [1] 19239 110
Now we gonna check some more details about data with DataExplorer package.
# Check data structure with introduce()
kable(introduce(df), "html") %>% kable_styling(bootstrap_options = c("striped","hover", "condensed")) %>% scroll_box(width = "100%")
| rows | columns | discrete_columns | continuous_columns | all_missing_columns | total_missing_values | complete_rows | total_observations | memory_usage |
|---|---|---|---|---|---|---|---|---|
| 19239 | 110 | 50 | 60 | 0 | 68414 | 0 | 2116290 | 20813248 |
# Now we will plot it with plot_intro()
plot_intro(df, title = "Data Structure of database")
plot_missing(df, missing_only = TRUE, title = "Percentage of Missing Features")
First, we need to deal with missing features in ours dataset.
Since free agent player didn’t play for any clubs. Therefore, some features (club_contract_valid_unitl, club_jersy_number, league_level, club_team_id, wage_eur, value_eur) will be null.
df %>%
filter(is.na(club_team_id) &
is.na(club_jersey_number) &
is.na(league_level) &
is.na(club_team_id) &
is.na(wage_eur) &
is.na(value_eur)) %$%
unique(club_name)
## [1] ""
However, we can see from graph above that there are some non-free agent player that didn’t have value. We also can assume that these old players value equal to 0.
df %>%
filter(is.na(value_eur) & club_name != "") %$%
kable(head(.),"html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| sofifa_id | player_url | short_name | long_name | player_positions | overall | potential | value_eur | wage_eur | age | dob | height_cm | weight_kg | club_team_id | club_name | league_name | league_level | club_position | club_jersey_number | club_loaned_from | club_joined | club_contract_valid_until | nationality_id | nationality_name | nation_team_id | nation_position | nation_jersey_number | preferred_foot | weak_foot | skill_moves | international_reputation | work_rate | body_type | real_face | release_clause_eur | player_tags | player_traits | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | goalkeeping_speed | ls | st | rs | lw | lf | cf | rf | rw | lam | cam | ram | lm | lcm | cm | rcm | rm | lwb | ldm | cdm | rdm | rwb | lb | lcb | cb | rcb | rb | gk | player_face_url | club_logo_url | club_flag_url | nation_logo_url | nation_flag_url |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 138703 | https://sofifa.com/player/138703/andres-dalessandro/220002 | A. D’Alessandro | Andrés Nicolás D’Alessandro | CAM, RM | 75 | 75 | NA | 900 | 40 | 1981-04-15 | 174 | 69 | 111325 | Club Nacional de Football | Uruguayan Primera División | 1 | SUB | 10 | 2021-01-04 | 2021 | 52 | Argentina | NA | NA | Left | 3 | 4 | 1 | Medium/Low | Normal (170-185) | No | NA | Flair, Long Passer (AI), Playmaker (AI), Technical Dribbler (AI) | 42 | 71 | 80 | 75 | 38 | 50 | 79 | 69 | 45 | 80 | 68 | 77 | 82 | 80 | 77 | 80 | 45 | 39 | 51 | 73 | 82 | 69 | 45 | 30 | 50 | 76 | 75 | 42 | 73 | 85 | 80 | 78 | 46 | 29 | 25 | 6 | 11 | 12 | 15 | 6 | NA | 66+2 | 66+2 | 66+2 | 72 | 72 | 72 | 72 | 72 | 75 | 75 | 75 | 71+2 | 71+2 | 71+2 | 71+2 | 71+2 | 53+2 | 58+2 | 58+2 | 58+2 | 53+2 | 49+2 | 47+2 | 47+2 | 47+2 | 49+2 | 17+2 | https://cdn.sofifa.net/players/138/703/22_120.png | https://cdn.sofifa.net/teams/111325/60.png | https://cdn.sofifa.net/flags/uy.png | https://cdn.sofifa.net/flags/ar.png | ||||
| 152912 | https://sofifa.com/player/152912/jose-sand/220002 | J. Sand | José Gustavo Sand | ST | 75 | 75 | NA | 13000 | 40 | 1980-07-17 | 182 | 79 | 110395 | Club Atlético Lanús | Argentina Primera División | 1 | LS | 9 | 2019-01-21 | 2022 | 52 | Argentina | NA | NA | Right | 4 | 3 | 1 | High/Low | Normal (170-185) | No | NA | Solid Player, Finesse Shot, Team Player | 51 | 79 | 62 | 68 | 37 | 70 | 53 | 81 | 77 | 69 | 77 | 67 | 63 | 63 | 46 | 75 | 49 | 53 | 38 | 81 | 67 | 77 | 71 | 41 | 83 | 73 | 73 | 33 | 83 | 69 | 85 | 85 | 45 | 21 | 25 | 11 | 16 | 15 | 9 | 14 | NA | 75 | 75 | 75 | 68 | 73 | 73 | 73 | 68 | 70+2 | 70+2 | 70+2 | 66+2 | 63+2 | 63+2 | 63+2 | 66+2 | 49+2 | 52+2 | 52+2 | 52+2 | 49+2 | 47+2 | 51+2 | 51+2 | 51+2 | 47+2 | 20+2 | https://cdn.sofifa.net/players/152/912/22_120.png | https://cdn.sofifa.net/teams/110395/60.png | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/ar.png | ||||
| 110381 | https://sofifa.com/player/110381/maximiliano-rodriguez/220002 | M. Rodríguez | Maximiliano Rubén Rodríguez | LW, LM, ST | 73 | 73 | NA | 10000 | 40 | 1981-01-02 | 173 | 74 | 110396 | Newell’s Old Boys | Argentina Primera División | 1 | SUB | 11 | 2019-01-21 | 2021 | 52 | Argentina | NA | NA | Right | 4 | 4 | 3 | Medium/Low | Normal (170-185) | No | NA | Leadership, Team Player | 64 | 74 | 76 | 75 | 44 | 64 | 72 | 73 | 61 | 78 | 71 | 75 | 76 | 71 | 75 | 76 | 64 | 64 | 72 | 72 | 70 | 77 | 67 | 46 | 70 | 75 | 73 | 47 | 76 | 76 | 77 | 86 | 57 | 30 | 26 | 15 | 11 | 15 | 9 | 9 | NA | 72+1 | 72+1 | 72+1 | 73 | 74-1 | 74-1 | 74-1 | 73 | 75-2 | 75-2 | 75-2 | 72+1 | 70+3 | 70+3 | 70+3 | 72+1 | 57+3 | 60+3 | 60+3 | 60+3 | 57+3 | 54+3 | 54+3 | 54+3 | 54+3 | 54+3 | 18+3 | https://cdn.sofifa.net/players/110/381/22_120.png | https://cdn.sofifa.net/teams/110396/60.png | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/ar.png | ||||
| 115909 | https://sofifa.com/player/115909/ruben-castro-martin/220002 | Rubén Castro | Rubén Castro Martín | ST | 69 | 69 | NA | 3000 | 40 | 1981-06-27 | 169 | 68 | 100851 | FC Cartagena | Spanish Segunda División | 2 | ST | 7 | 2020-09-12 | 2022 | 45 | Spain | NA | NA | Right | 3 | 3 | 2 | Medium/Low | Normal (170-) | No | NA | 70 | 70 | 61 | 72 | 29 | 61 | 60 | 71 | 62 | 60 | 66 | 70 | 68 | 67 | 45 | 69 | 72 | 69 | 86 | 75 | 81 | 69 | 85 | 62 | 61 | 68 | 54 | 37 | 77 | 74 | 75 | 68 | 36 | 12 | 14 | 14 | 8 | 15 | 8 | 12 | NA | 69 | 69 | 69 | 70-1 | 71-2 | 71-2 | 71-2 | 70-1 | 69 | 69 | 69 | 67+2 | 61+2 | 61+2 | 61+2 | 67+2 | 50+2 | 47+2 | 47+2 | 47+2 | 50+2 | 47+2 | 43+2 | 43+2 | 43+2 | 47+2 | 18+2 | https://cdn.sofifa.net/players/115/909/22_120.png | https://cdn.sofifa.net/teams/100851/60.png | https://cdn.sofifa.net/flags/es.png | https://cdn.sofifa.net/flags/es.png | |||||
| 153066 | https://sofifa.com/player/153066/lucas-licht/220002 | L. Licht | Lucas Matías Licht | LB, LWB, LM | 69 | 69 | NA | 5000 | 40 | 1981-04-06 | 174 | 72 | 101084 | Gimnasia y Esgrima La Plata | Argentina Primera División | 1 | SUB | 25 | 2012-07-21 | 2021 | 52 | Argentina | NA | NA | Left | 4 | 3 | 1 | Medium/Medium | Normal (170-185) | No | NA | Leadership, Early Crosser, Team Player | 64 | 61 | 65 | 72 | 68 | 66 | 80 | 52 | 53 | 52 | 57 | 76 | 73 | 68 | 71 | 69 | 59 | 68 | 69 | 66 | 73 | 71 | 72 | 64 | 66 | 68 | 69 | 71 | 56 | 64 | 84 | 72 | 69 | 71 | 69 | 8 | 14 | 6 | 15 | 8 | NA | 61+2 | 61+2 | 61+2 | 65 | 63 | 63 | 63 | 65 | 64+2 | 64+2 | 64+2 | 66+2 | 65+2 | 65+2 | 65+2 | 66+2 | 68+1 | 67+2 | 67+2 | 67+2 | 68+1 | 67+2 | 67+2 | 67+2 | 67+2 | 67+2 | 17+2 | https://cdn.sofifa.net/players/153/066/22_120.png | https://cdn.sofifa.net/teams/101084/60.png | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/ar.png | ||||
| 124344 | https://sofifa.com/player/124344/ibrahim-ozturk/220002 | İ. Öztürk | İbrahim Öztürk | CB | 67 | 67 | NA | 3000 | 40 | 1981-06-21 | 186 | 80 | 101006 | Altay SK | Turkish Süper Lig | 1 | CB | 38 | 2017-08-24 | 2022 | 48 | Turkey | NA | NA | Right | 2 | 2 | 1 | Low/High | Normal (185+) | No | NA | Dives Into Tackles (AI), Team Player | 32 | 43 | 48 | 48 | 68 | 74 | 44 | 30 | 72 | 56 | 52 | 46 | 44 | 48 | 42 | 54 | 30 | 34 | 32 | 68 | 38 | 66 | 59 | 65 | 78 | 46 | 78 | 66 | 34 | 44 | 51 | 51 | 70 | 68 | 62 | 15 | 13 | 10 | 10 | 6 | NA | 50+2 | 50+2 | 50+2 | 44 | 47 | 47 | 47 | 44 | 46+2 | 46+2 | 46+2 | 46+2 | 52+2 | 52+2 | 52+2 | 46+2 | 57+2 | 62+2 | 62+2 | 62+2 | 57+2 | 59+2 | 67 | 67 | 67 | 59+2 | 17+2 | https://cdn.sofifa.net/players/124/344/22_120.png | https://cdn.sofifa.net/teams/101006/60.png | https://cdn.sofifa.net/flags/tr.png | https://cdn.sofifa.net/flags/tr.png |
Fill the missing values
# Fill the missing values
df$club_contract_valid_until[is.na(df$club_contract_valid_until)] <- 0
df$club_jersey_number[is.na(df$club_jersey_number)] <- 0
# league_level is ordinal variable which 1 is the highest league and 5 is lowest league
# Since, these player are free agent and didn't play in any league at the moment so we assign 6 to them
df$league_level[is.na(df$league_level)] <- 6
# club_team_id run from 1 to 115820. we will assing 0 to the free agent
df$club_team_id[is.na(df$club_team_id)] <- 0
df$wage_eur[is.na(df$wage_eur)] <- 0
df$value_eur[is.na(df$value_eur)] <- 0
plot_missing(df, missing_only = TRUE, title = "Percentage of Missing Features")
Not all player have release clause. So, it natural to have some missng release_clause_eur.
df %>%
filter(is.na(release_clause_eur)) %$%
kable(head(.),"html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| sofifa_id | player_url | short_name | long_name | player_positions | overall | potential | value_eur | wage_eur | age | dob | height_cm | weight_kg | club_team_id | club_name | league_name | league_level | club_position | club_jersey_number | club_loaned_from | club_joined | club_contract_valid_until | nationality_id | nationality_name | nation_team_id | nation_position | nation_jersey_number | preferred_foot | weak_foot | skill_moves | international_reputation | work_rate | body_type | real_face | release_clause_eur | player_tags | player_traits | pace | shooting | passing | dribbling | defending | physic | attacking_crossing | attacking_finishing | attacking_heading_accuracy | attacking_short_passing | attacking_volleys | skill_dribbling | skill_curve | skill_fk_accuracy | skill_long_passing | skill_ball_control | movement_acceleration | movement_sprint_speed | movement_agility | movement_reactions | movement_balance | power_shot_power | power_jumping | power_stamina | power_strength | power_long_shots | mentality_aggression | mentality_interceptions | mentality_positioning | mentality_vision | mentality_penalties | mentality_composure | defending_marking_awareness | defending_standing_tackle | defending_sliding_tackle | goalkeeping_diving | goalkeeping_handling | goalkeeping_kicking | goalkeeping_positioning | goalkeeping_reflexes | goalkeeping_speed | ls | st | rs | lw | lf | cf | rf | rw | lam | cam | ram | lm | lcm | cm | rcm | rm | lwb | ldm | cdm | rdm | rwb | lb | lcb | cb | rcb | rb | gk | player_face_url | club_logo_url | club_flag_url | nation_logo_url | nation_flag_url |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 194765 | https://sofifa.com/player/194765/antoine-griezmann/220002 | A. Griezmann | Antoine Griezmann | ST, LW, RW | 85 | 85 | 53000000 | 220000 | 30 | 1991-03-21 | 176 | 73 | 240 | Atlético de Madrid | Spain Primera Division | 1 | LS | 17 | FC Barcelona | 2022 | 18 | France | 1335 | RW | 7 | Left | 3 | 4 | 4 | Medium/Medium | Unique | Yes | NA | #Acrobat | Finesse Shot, Flair, Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot, Chip Shot (AI), Technical Dribbler (AI) | 80 | 84 | 84 | 87 | 52 | 72 | 83 | 84 | 83 | 84 | 86 | 85 | 86 | 85 | 82 | 89 | 80 | 80 | 92 | 89 | 83 | 82 | 90 | 86 | 63 | 83 | 73 | 49 | 89 | 85 | 79 | 90 | 43 | 54 | 49 | 14 | 8 | 14 | 13 | 14 | NA | 84+1 | 84+1 | 84+1 | 85 | 86-1 | 86-1 | 86-1 | 85 | 86-1 | 86-1 | 86-1 | 85 | 82+3 | 82+3 | 82+3 | 85 | 71+3 | 69+3 | 69+3 | 69+3 | 71+3 | 69+3 | 63+3 | 63+3 | 63+3 | 69+3 | 21+3 | https://cdn.sofifa.net/players/194/765/22_120.png | https://cdn.sofifa.net/teams/240/60.png | https://cdn.sofifa.net/flags/es.png | https://cdn.sofifa.net/teams/1335/60.png | https://cdn.sofifa.net/flags/fr.png | |
| 184087 | https://sofifa.com/player/184087/toby-alderweireld/220002 | T. Alderweireld | Toby Alderweireld | CB | 83 | 83 | 0 | 0 | 32 | 1989-03-02 | 186 | 81 | 0 | 6 | 0 | 0 | 7 | Belgium | 1325 | RCB | 2 | Right | 3 | 2 | 3 | Medium/Medium | Normal (185+) | Yes | NA | Long Passer (AI) | 58 | 55 | 70 | 67 | 86 | 77 | 64 | 45 | 81 | 77 | 38 | 62 | 63 | 59 | 81 | 75 | 55 | 60 | 54 | 85 | 62 | 78 | 81 | 76 | 77 | 58 | 79 | 85 | 52 | 62 | 58 | 86 | 87 | 87 | 84 | 16 | 6 | 14 | 16 | 14 | NA | 65+3 | 65+3 | 65+3 | 63 | 65 | 65 | 65 | 63 | 66+3 | 66+3 | 66+3 | 66+3 | 73+3 | 73+3 | 73+3 | 66+3 | 77+3 | 81+2 | 81+2 | 81+2 | 77+3 | 78+3 | 82+1 | 82+1 | 82+1 | 78+3 | 21+3 | https://cdn.sofifa.net/players/184/087/22_120.png | https://cdn.sofifa.net/teams/1325/60.png | https://cdn.sofifa.net/flags/be.png | ||||||||
| 201153 | https://sofifa.com/player/201153/alvaro-borja-morata-martin/220002 | Morata | Álvaro Borja Morata Martín | ST | 83 | 83 | 37000000 | 84000 | 28 | 1992-10-23 | 190 | 84 | 45 | Juventus | Italian Serie A | 1 | ST | 9 | Atlético de Madrid | 2022 | 45 | Spain | 1362 | SUB | 7 | Right | 4 | 3 | 3 | Medium/Medium | Unique | Yes | NA | Speed Dribbler (AI) | 82 | 80 | 72 | 81 | 31 | 77 | 72 | 84 | 86 | 78 | 80 | 83 | 78 | 44 | 60 | 83 | 79 | 85 | 72 | 80 | 63 | 80 | 84 | 77 | 80 | 72 | 69 | 24 | 87 | 77 | 75 | 79 | 37 | 14 | 20 | 4 | 5 | 4 | 4 | 5 | NA | 83 | 83 | 83 | 81 | 82 | 82 | 82 | 81 | 80+3 | 80+3 | 80+3 | 79+3 | 71+3 | 71+3 | 71+3 | 79+3 | 58+3 | 55+3 | 55+3 | 55+3 | 58+3 | 54+3 | 50+3 | 50+3 | 50+3 | 54+3 | 13+3 | https://cdn.sofifa.net/players/201/153/22_120.png | https://cdn.sofifa.net/teams/45/60.png | https://cdn.sofifa.net/flags/it.png | https://cdn.sofifa.net/teams/1362/60.png | https://cdn.sofifa.net/flags/es.png | ||
| 235805 | https://sofifa.com/player/235805/federico-chiesa/220002 | F. Chiesa | Federico Chiesa | RW, LW, RM | 83 | 91 | 80500000 | 74000 | 23 | 1997-10-25 | 175 | 70 | 45 | Juventus | Italian Serie A | 1 | LM | 22 | Fiorentina | 2022 | 27 | Italy | 1343 | RW | 14 | Right | 4 | 4 | 3 | High/Medium | Normal (170-185) | Yes | NA | #Speedster, #Dribbler, #Acrobat | Long Shot Taker (AI), Speed Dribbler (AI) | 91 | 81 | 74 | 85 | 48 | 73 | 73 | 79 | 50 | 78 | 80 | 89 | 78 | 52 | 72 | 82 | 91 | 91 | 87 | 83 | 81 | 86 | 53 | 85 | 71 | 84 | 69 | 30 | 81 | 75 | 62 | 78 | 65 | 44 | 44 | 6 | 7 | 8 | 9 | 7 | NA | 79+3 | 79+3 | 79+3 | 83 | 82 | 82 | 82 | 83 | 82+3 | 82+3 | 82+3 | 82+3 | 75+3 | 75+3 | 75+3 | 82+3 | 67+3 | 64+3 | 64+3 | 64+3 | 67+3 | 64+3 | 57+3 | 57+3 | 57+3 | 64+3 | 16+3 | https://cdn.sofifa.net/players/235/805/22_120.png | https://cdn.sofifa.net/teams/45/60.png | https://cdn.sofifa.net/flags/it.png | https://cdn.sofifa.net/teams/1343/60.png | https://cdn.sofifa.net/flags/it.png | |
| 180206 | https://sofifa.com/player/180206/miralem-pjanic/220002 | M. Pjanić | Miralem Pjanić | CM | 82 | 82 | 25000000 | 155000 | 31 | 1990-04-02 | 178 | 72 | 327 | Beşiktaş JK | Turkish Süper Lig | 1 | RCM | 15 | FC Barcelona | 2022 | 8 | Bosnia and Herzegovina | NA | NA | Right | 4 | 3 | 3 | Medium/Medium | Normal (170-185) | Yes | NA | #FK Specialist | Finesse Shot, Playmaker (AI), Outside Foot Shot, Technical Dribbler (AI) | 65 | 68 | 83 | 81 | 75 | 67 | 80 | 56 | 60 | 84 | 72 | 81 | 86 | 92 | 82 | 83 | 67 | 64 | 74 | 83 | 80 | 78 | 59 | 79 | 60 | 80 | 70 | 78 | 68 | 84 | 79 | 84 | 78 | 77 | 71 | 7 | 7 | 13 | 7 | 8 | NA | 70+3 | 70+3 | 70+3 | 76 | 75 | 75 | 75 | 76 | 78+3 | 78+3 | 78+3 | 77+3 | 81+1 | 81+1 | 81+1 | 77+3 | 78+3 | 79+3 | 79+3 | 79+3 | 78+3 | 76+3 | 73+3 | 73+3 | 73+3 | 76+3 | 16+3 | https://cdn.sofifa.net/players/180/206/22_120.png | https://cdn.sofifa.net/teams/327/60.png | https://cdn.sofifa.net/flags/tr.png | https://cdn.sofifa.net/flags/ba.png | |||
| 193105 | https://sofifa.com/player/193105/alphonse-areola/220002 | A. Areola | Alphonse Areola | GK | 82 | 84 | 26000000 | 75000 | 28 | 1993-02-27 | 195 | 94 | 19 | West Ham United | English Premier League | 1 | SUB | 13 | Paris Saint-Germain | 2022 | 18 | France | NA | NA | Right | 3 | 1 | 2 | Medium/Medium | Unique | Yes | NA | Comes For Crosses | NA | NA | NA | NA | NA | NA | 20 | 19 | 14 | 48 | 16 | 15 | 16 | 16 | 37 | 22 | 56 | 54 | 58 | 78 | 58 | 57 | 72 | 38 | 80 | 14 | 26 | 23 | 17 | 51 | 25 | 64 | 13 | 18 | 12 | 85 | 79 | 76 | 80 | 85 | 55 | 34+2 | 34+2 | 34+2 | 33 | 34 | 34 | 34 | 33 | 36+2 | 36+2 | 36+2 | 35+2 | 36+2 | 36+2 | 36+2 | 35+2 | 32+2 | 34+2 | 34+2 | 34+2 | 32+2 | 31+2 | 31+2 | 31+2 | 31+2 | 31+2 | 81+2 | https://cdn.sofifa.net/players/193/105/22_120.png | https://cdn.sofifa.net/teams/19/60.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/flags/fr.png |
The player with missing release_clause_eur are player whom didn’t have release clause so it equal to 0.
df$release_clause_eur[is.na(df$release_clause_eur)] <- 0
plot_missing(df, missing_only = TRUE, title = "Percentage of Missing Features")
From the data we can see that there are 11.08% of player which missing basic features such as physic, defending, dribbling, passing, shooting, and pace. On the other hand, there are 88.92% of player who missing goalkeeping_speed. If we combine these two numbers we will got 100%. Therefore, There might be two group of player Goalkeeper and Non-Goalkeeper.
# Check non-goalkeeper group
df %>%
filter(is.na(goalkeeping_speed)) %$%
unique(club_position)
## [1] "RW" "ST" "LW" "RCM" "CF" "CDM" "LCB" "RDM" "RS" "LCM" "SUB" "CAM"
## [13] "RCB" "LDM" "LB" "RB" "LM" "RM" "LS" "CB" "RES" "" "RWB" "RF"
## [25] "CM" "LWB" "LAM" "LF" "RAM"
As we expect these group of players are Non-Goalkeeper. we can fill goalkeeping_speed with 0 since it not related with their position.
df$goalkeeping_speed[is.na(df$goalkeeping_speed)] <- 0
# Check goalkeeper group
df %>%
filter(is.na(physic) &
is.na(defending) &
is.na(dribbling) &
is.na(passing) &
is.na(shooting)&
is.na(pace)) %$%
unique(club_position)
## [1] "GK" "SUB" "RES" ""
Apparently there are not only GK in these group need keep investigating.
df %>%
filter(is.na(physic) &
is.na(defending) &
is.na(dribbling) &
is.na(passing) &
is.na(shooting)&
is.na(pace)) %>%
filter(club_position %in% c("SUB", "RES", "")) %$%
unique(player_positions)
## [1] "GK"
After more investigation the player with club_position equal to SUB, RES, and "" in second group also a Goalkeeper. and we can fill these features with 0.
df$physic[is.na(df$physic)] <- 0
df$defending[is.na(df$defending)] <- 0
df$dribbling[is.na(df$dribbling)] <- 0
df$passing[is.na(df$passing)] <- 0
df$shooting[is.na(df$shooting)] <-0
df$pace[is.na(df$pace)] <- 0
plot_missing(df, missing_only = TRUE, title = "Percentage of Missing Features")
The missing value of nation_jersey_number and nation_team_id is a majority of player who not selected in their national team. we will fill this with 0.
df$nation_jersey_number[is.na(df$nation_jersey_number)] <- 0
df$nation_team_id[is.na(df$nation_team_id)] <- 0
plot_intro(df, title = "Data Structure of database")
With this we deal with all missing value and reach tidydata.
# save df into .Rdata format
save(df, file = "../output/fifa_22_tidydata_raw.Rdata")
list.files(path = "../output")
## [1] "FIFA_22_Analysis_files" "FIFA_22_Analysis.html"
## [3] "fifa_22_tidydata_cleaned.Rdata" "fifa_22_tidydata_raw.Rdata"
df %<>% select(-player_url, -long_name, -real_face, -ls, -st, -rs, -lw, -lf, -cf, -rf, -rw, -lam, -cam, -ram, -lm, -lcm, -cm, -rcm, -rm, -lwb, -ldm, -cdm, -rdm, -rwb, -lb, -lcb, -cb, -rcb, -rb, -gk)
kable(t(head(df)), "html") %>% kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>% scroll_box(width = "100%")
| 1 | 2 | 3 | 4 | 5 | 6 | |
|---|---|---|---|---|---|---|
| sofifa_id | 158023 | 188545 | 20801 | 190871 | 192985 | 200389 |
| short_name | L. Messi | R. Lewandowski | Cristiano Ronaldo | Neymar Jr | K. De Bruyne | J. Oblak |
| player_positions | RW, ST, CF | ST | ST, LW | LW, CAM | CM, CAM | GK |
| overall | 93 | 92 | 91 | 91 | 91 | 91 |
| potential | 93 | 92 | 91 | 91 | 91 | 93 |
| value_eur | 78000000 | 119500000 | 45000000 | 129000000 | 125500000 | 112000000 |
| wage_eur | 320000 | 270000 | 270000 | 270000 | 350000 | 130000 |
| age | 34 | 32 | 36 | 29 | 30 | 28 |
| dob | 1987-06-24 | 1988-08-21 | 1985-02-05 | 1992-02-05 | 1991-06-28 | 1993-01-07 |
| height_cm | 170 | 185 | 187 | 175 | 181 | 188 |
| weight_kg | 72 | 81 | 83 | 68 | 70 | 87 |
| club_team_id | 73 | 21 | 11 | 73 | 10 | 240 |
| club_name | Paris Saint-Germain | FC Bayern München | Manchester United | Paris Saint-Germain | Manchester City | Atlético de Madrid |
| league_name | French Ligue 1 | German 1. Bundesliga | English Premier League | French Ligue 1 | English Premier League | Spain Primera Division |
| league_level | 1 | 1 | 1 | 1 | 1 | 1 |
| club_position | RW | ST | ST | LW | RCM | GK |
| club_jersey_number | 30 | 9 | 7 | 10 | 17 | 13 |
| club_loaned_from | ||||||
| club_joined | 2021-08-10 | 2014-07-01 | 2021-08-27 | 2017-08-03 | 2015-08-30 | 2014-07-16 |
| club_contract_valid_until | 2023 | 2023 | 2023 | 2025 | 2025 | 2023 |
| nationality_id | 52 | 37 | 38 | 54 | 7 | 44 |
| nationality_name | Argentina | Poland | Portugal | Brazil | Belgium | Slovenia |
| nation_team_id | 1369 | 1353 | 1354 | 0 | 1325 | 0 |
| nation_position | RW | RS | ST | RCM | ||
| nation_jersey_number | 10 | 9 | 7 | 0 | 7 | 0 |
| preferred_foot | Left | Right | Right | Right | Right | Right |
| weak_foot | 4 | 4 | 4 | 5 | 5 | 3 |
| skill_moves | 4 | 4 | 5 | 5 | 4 | 1 |
| international_reputation | 5 | 5 | 5 | 5 | 4 | 5 |
| work_rate | Medium/Low | High/Medium | High/Low | High/Medium | High/High | Medium/Medium |
| body_type | Unique | Unique | Unique | Unique | Unique | Unique |
| release_clause_eur | 144300000 | 197200000 | 83300000 | 238700000 | 232200000 | 238000000 |
| player_tags | #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Distance Shooter, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Dribbler, #Distance Shooter, #Crosser, #Acrobat, #Clinical Finisher, #Complete Forward | #Speedster, #Dribbler, #Playmaker, #FK Specialist, #Acrobat, #Complete Midfielder | #Dribbler, #Playmaker, #Engine, #Distance Shooter, #Crosser, #Complete Midfielder | |
| player_traits | Finesse Shot, Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Chip Shot (AI), Technical Dribbler (AI) | Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI) | Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot | Injury Prone, Flair, Speed Dribbler (AI), Playmaker (AI), Outside Foot Shot, Technical Dribbler (AI) | Injury Prone, Leadership, Early Crosser, Long Passer (AI), Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot | GK Long Throw, Comes For Crosses |
| pace | 85 | 78 | 87 | 91 | 76 | 0 |
| shooting | 92 | 92 | 94 | 83 | 86 | 0 |
| passing | 91 | 79 | 80 | 86 | 93 | 0 |
| dribbling | 95 | 86 | 88 | 94 | 88 | 0 |
| defending | 34 | 44 | 34 | 37 | 64 | 0 |
| physic | 65 | 82 | 75 | 63 | 78 | 0 |
| attacking_crossing | 85 | 71 | 87 | 85 | 94 | 13 |
| attacking_finishing | 95 | 95 | 95 | 83 | 82 | 11 |
| attacking_heading_accuracy | 70 | 90 | 90 | 63 | 55 | 15 |
| attacking_short_passing | 91 | 85 | 80 | 86 | 94 | 43 |
| attacking_volleys | 88 | 89 | 86 | 86 | 82 | 13 |
| skill_dribbling | 96 | 85 | 88 | 95 | 88 | 12 |
| skill_curve | 93 | 79 | 81 | 88 | 85 | 13 |
| skill_fk_accuracy | 94 | 85 | 84 | 87 | 83 | 14 |
| skill_long_passing | 91 | 70 | 77 | 81 | 93 | 40 |
| skill_ball_control | 96 | 88 | 88 | 95 | 91 | 30 |
| movement_acceleration | 91 | 77 | 85 | 93 | 76 | 43 |
| movement_sprint_speed | 80 | 79 | 88 | 89 | 76 | 60 |
| movement_agility | 91 | 77 | 86 | 96 | 79 | 67 |
| movement_reactions | 94 | 93 | 94 | 89 | 91 | 88 |
| movement_balance | 95 | 82 | 74 | 84 | 78 | 49 |
| power_shot_power | 86 | 90 | 94 | 80 | 91 | 59 |
| power_jumping | 68 | 85 | 95 | 64 | 63 | 78 |
| power_stamina | 72 | 76 | 77 | 81 | 89 | 41 |
| power_strength | 69 | 86 | 77 | 53 | 74 | 78 |
| power_long_shots | 94 | 87 | 93 | 81 | 91 | 12 |
| mentality_aggression | 44 | 81 | 63 | 63 | 76 | 34 |
| mentality_interceptions | 40 | 49 | 29 | 37 | 66 | 19 |
| mentality_positioning | 93 | 95 | 95 | 86 | 88 | 11 |
| mentality_vision | 95 | 81 | 76 | 90 | 94 | 65 |
| mentality_penalties | 75 | 90 | 88 | 93 | 83 | 11 |
| mentality_composure | 96 | 88 | 95 | 93 | 89 | 68 |
| defending_marking_awareness | 20 | 35 | 24 | 35 | 68 | 27 |
| defending_standing_tackle | 35 | 42 | 32 | 32 | 65 | 12 |
| defending_sliding_tackle | 24 | 19 | 24 | 29 | 53 | 18 |
| goalkeeping_diving | 6 | 15 | 7 | 9 | 15 | 87 |
| goalkeeping_handling | 11 | 6 | 11 | 9 | 13 | 92 |
| goalkeeping_kicking | 15 | 12 | 15 | 15 | 5 | 78 |
| goalkeeping_positioning | 14 | 8 | 14 | 15 | 10 | 90 |
| goalkeeping_reflexes | 8 | 10 | 11 | 11 | 13 | 90 |
| goalkeeping_speed | 0 | 0 | 0 | 0 | 0 | 50 |
| player_face_url | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/players/020/801/22_120.png | https://cdn.sofifa.net/players/190/871/22_120.png | https://cdn.sofifa.net/players/192/985/22_120.png | https://cdn.sofifa.net/players/200/389/22_120.png |
| club_logo_url | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/teams/11/60.png | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/teams/10/60.png | https://cdn.sofifa.net/teams/240/60.png |
| club_flag_url | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/flags/gb-eng.png | https://cdn.sofifa.net/flags/es.png |
| nation_logo_url | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/teams/1354/60.png | https://cdn.sofifa.net/teams/1325/60.png | ||
| nation_flag_url | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/pl.png | https://cdn.sofifa.net/flags/pt.png | https://cdn.sofifa.net/flags/br.png | https://cdn.sofifa.net/flags/be.png | https://cdn.sofifa.net/flags/si.png |
work_rate, player_tags, and player_traits are stacked columns. This mean in each columns there might be more than one values in it.
# Unstacked work_rate
df <- df %$%
cSplit(., 'work_rate', sep= "/", type.convert = FALSE) %>%
rename(
work_rate_att = work_rate_1,
work_rate_dff = work_rate_2 )
kable(t(head(df,3)), "html") %>% kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>% scroll_box(width = "100%")
| sofifa_id | 158023 | 188545 | 20801 |
| short_name | L. Messi | R. Lewandowski | Cristiano Ronaldo |
| player_positions | RW, ST, CF | ST | ST, LW |
| overall | 93 | 92 | 91 |
| potential | 93 | 92 | 91 |
| value_eur | 78000000 | 119500000 | 45000000 |
| wage_eur | 320000 | 270000 | 270000 |
| age | 34 | 32 | 36 |
| dob | 1987-06-24 | 1988-08-21 | 1985-02-05 |
| height_cm | 170 | 185 | 187 |
| weight_kg | 72 | 81 | 83 |
| club_team_id | 73 | 21 | 11 |
| club_name | Paris Saint-Germain | FC Bayern München | Manchester United |
| league_name | French Ligue 1 | German 1. Bundesliga | English Premier League |
| league_level | 1 | 1 | 1 |
| club_position | RW | ST | ST |
| club_jersey_number | 30 | 9 | 7 |
| club_loaned_from | |||
| club_joined | 2021-08-10 | 2014-07-01 | 2021-08-27 |
| club_contract_valid_until | 2023 | 2023 | 2023 |
| nationality_id | 52 | 37 | 38 |
| nationality_name | Argentina | Poland | Portugal |
| nation_team_id | 1369 | 1353 | 1354 |
| nation_position | RW | RS | ST |
| nation_jersey_number | 10 | 9 | 7 |
| preferred_foot | Left | Right | Right |
| weak_foot | 4 | 4 | 4 |
| skill_moves | 4 | 4 | 5 |
| international_reputation | 5 | 5 | 5 |
| body_type | Unique | Unique | Unique |
| release_clause_eur | 144300000 | 197200000 | 83300000 |
| player_tags | #Dribbler, #Distance Shooter, #FK Specialist, #Acrobat, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Distance Shooter, #Clinical Finisher, #Complete Forward | #Aerial Threat, #Dribbler, #Distance Shooter, #Crosser, #Acrobat, #Clinical Finisher, #Complete Forward |
| player_traits | Finesse Shot, Long Shot Taker (AI), Playmaker (AI), Outside Foot Shot, One Club Player, Chip Shot (AI), Technical Dribbler (AI) | Solid Player, Finesse Shot, Outside Foot Shot, Chip Shot (AI) | Power Free-Kick, Flair, Long Shot Taker (AI), Speed Dribbler (AI), Outside Foot Shot |
| pace | 85 | 78 | 87 |
| shooting | 92 | 92 | 94 |
| passing | 91 | 79 | 80 |
| dribbling | 95 | 86 | 88 |
| defending | 34 | 44 | 34 |
| physic | 65 | 82 | 75 |
| attacking_crossing | 85 | 71 | 87 |
| attacking_finishing | 95 | 95 | 95 |
| attacking_heading_accuracy | 70 | 90 | 90 |
| attacking_short_passing | 91 | 85 | 80 |
| attacking_volleys | 88 | 89 | 86 |
| skill_dribbling | 96 | 85 | 88 |
| skill_curve | 93 | 79 | 81 |
| skill_fk_accuracy | 94 | 85 | 84 |
| skill_long_passing | 91 | 70 | 77 |
| skill_ball_control | 96 | 88 | 88 |
| movement_acceleration | 91 | 77 | 85 |
| movement_sprint_speed | 80 | 79 | 88 |
| movement_agility | 91 | 77 | 86 |
| movement_reactions | 94 | 93 | 94 |
| movement_balance | 95 | 82 | 74 |
| power_shot_power | 86 | 90 | 94 |
| power_jumping | 68 | 85 | 95 |
| power_stamina | 72 | 76 | 77 |
| power_strength | 69 | 86 | 77 |
| power_long_shots | 94 | 87 | 93 |
| mentality_aggression | 44 | 81 | 63 |
| mentality_interceptions | 40 | 49 | 29 |
| mentality_positioning | 93 | 95 | 95 |
| mentality_vision | 95 | 81 | 76 |
| mentality_penalties | 75 | 90 | 88 |
| mentality_composure | 96 | 88 | 95 |
| defending_marking_awareness | 20 | 35 | 24 |
| defending_standing_tackle | 35 | 42 | 32 |
| defending_sliding_tackle | 24 | 19 | 24 |
| goalkeeping_diving | 6 | 15 | 7 |
| goalkeeping_handling | 11 | 6 | 11 |
| goalkeeping_kicking | 15 | 12 | 15 |
| goalkeeping_positioning | 14 | 8 | 14 |
| goalkeeping_reflexes | 8 | 10 | 11 |
| goalkeeping_speed | 0 | 0 | 0 |
| player_face_url | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/players/020/801/22_120.png |
| club_logo_url | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/teams/11/60.png |
| club_flag_url | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/flags/gb-eng.png |
| nation_logo_url | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/teams/1354/60.png |
| nation_flag_url | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/pl.png | https://cdn.sofifa.net/flags/pt.png |
| work_rate_att | Medium | High | High |
| work_rate_dff | Low | Medium | Low |
for feature like player_tags, and player_traits we need to one-hot encode them.
one_hot <- function(x) {
map_df(x, table) %>%
mutate_all(as.integer) %>%
mutate_all(replace_na, 0L)
}
df$player_traits <- str_remove_all(df$player_traits, "\\(AI\\)")
df$player_tags[df$player_tags == ""] <- "#No_Tags"
df$player_traits[df$player_traits == ""] <- "No Traits"
df %<>%
mutate(one_hot(strsplit(player_tags, ", "))) %>%
mutate(one_hot(strsplit(player_traits, ", "))) %>%
select(-player_tags, -player_traits)
df %$%
kable(t(head(.,3)), "html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| sofifa_id | 158023 | 188545 | 20801 |
| short_name | L. Messi | R. Lewandowski | Cristiano Ronaldo |
| player_positions | RW, ST, CF | ST | ST, LW |
| overall | 93 | 92 | 91 |
| potential | 93 | 92 | 91 |
| value_eur | 78000000 | 119500000 | 45000000 |
| wage_eur | 320000 | 270000 | 270000 |
| age | 34 | 32 | 36 |
| dob | 1987-06-24 | 1988-08-21 | 1985-02-05 |
| height_cm | 170 | 185 | 187 |
| weight_kg | 72 | 81 | 83 |
| club_team_id | 73 | 21 | 11 |
| club_name | Paris Saint-Germain | FC Bayern München | Manchester United |
| league_name | French Ligue 1 | German 1. Bundesliga | English Premier League |
| league_level | 1 | 1 | 1 |
| club_position | RW | ST | ST |
| club_jersey_number | 30 | 9 | 7 |
| club_loaned_from | |||
| club_joined | 2021-08-10 | 2014-07-01 | 2021-08-27 |
| club_contract_valid_until | 2023 | 2023 | 2023 |
| nationality_id | 52 | 37 | 38 |
| nationality_name | Argentina | Poland | Portugal |
| nation_team_id | 1369 | 1353 | 1354 |
| nation_position | RW | RS | ST |
| nation_jersey_number | 10 | 9 | 7 |
| preferred_foot | Left | Right | Right |
| weak_foot | 4 | 4 | 4 |
| skill_moves | 4 | 4 | 5 |
| international_reputation | 5 | 5 | 5 |
| body_type | Unique | Unique | Unique |
| release_clause_eur | 144300000 | 197200000 | 83300000 |
| pace | 85 | 78 | 87 |
| shooting | 92 | 92 | 94 |
| passing | 91 | 79 | 80 |
| dribbling | 95 | 86 | 88 |
| defending | 34 | 44 | 34 |
| physic | 65 | 82 | 75 |
| attacking_crossing | 85 | 71 | 87 |
| attacking_finishing | 95 | 95 | 95 |
| attacking_heading_accuracy | 70 | 90 | 90 |
| attacking_short_passing | 91 | 85 | 80 |
| attacking_volleys | 88 | 89 | 86 |
| skill_dribbling | 96 | 85 | 88 |
| skill_curve | 93 | 79 | 81 |
| skill_fk_accuracy | 94 | 85 | 84 |
| skill_long_passing | 91 | 70 | 77 |
| skill_ball_control | 96 | 88 | 88 |
| movement_acceleration | 91 | 77 | 85 |
| movement_sprint_speed | 80 | 79 | 88 |
| movement_agility | 91 | 77 | 86 |
| movement_reactions | 94 | 93 | 94 |
| movement_balance | 95 | 82 | 74 |
| power_shot_power | 86 | 90 | 94 |
| power_jumping | 68 | 85 | 95 |
| power_stamina | 72 | 76 | 77 |
| power_strength | 69 | 86 | 77 |
| power_long_shots | 94 | 87 | 93 |
| mentality_aggression | 44 | 81 | 63 |
| mentality_interceptions | 40 | 49 | 29 |
| mentality_positioning | 93 | 95 | 95 |
| mentality_vision | 95 | 81 | 76 |
| mentality_penalties | 75 | 90 | 88 |
| mentality_composure | 96 | 88 | 95 |
| defending_marking_awareness | 20 | 35 | 24 |
| defending_standing_tackle | 35 | 42 | 32 |
| defending_sliding_tackle | 24 | 19 | 24 |
| goalkeeping_diving | 6 | 15 | 7 |
| goalkeeping_handling | 11 | 6 | 11 |
| goalkeeping_kicking | 15 | 12 | 15 |
| goalkeeping_positioning | 14 | 8 | 14 |
| goalkeeping_reflexes | 8 | 10 | 11 |
| goalkeeping_speed | 0 | 0 | 0 |
| player_face_url | https://cdn.sofifa.net/players/158/023/22_120.png | https://cdn.sofifa.net/players/188/545/22_120.png | https://cdn.sofifa.net/players/020/801/22_120.png |
| club_logo_url | https://cdn.sofifa.net/teams/73/60.png | https://cdn.sofifa.net/teams/21/60.png | https://cdn.sofifa.net/teams/11/60.png |
| club_flag_url | https://cdn.sofifa.net/flags/fr.png | https://cdn.sofifa.net/flags/de.png | https://cdn.sofifa.net/flags/gb-eng.png |
| nation_logo_url | https://cdn.sofifa.net/teams/1369/60.png | https://cdn.sofifa.net/teams/1353/60.png | https://cdn.sofifa.net/teams/1354/60.png |
| nation_flag_url | https://cdn.sofifa.net/flags/ar.png | https://cdn.sofifa.net/flags/pl.png | https://cdn.sofifa.net/flags/pt.png |
| work_rate_att | Medium | High | High |
| work_rate_dff | Low | Medium | Low |
| #Acrobat | 1 | 0 | 1 |
| #Clinical Finisher | 1 | 1 | 1 |
| #Complete Forward | 1 | 1 | 1 |
| #Distance Shooter | 1 | 1 | 1 |
| #Dribbler | 1 | 0 | 1 |
| #FK Specialist | 1 | 0 | 0 |
| #Aerial Threat | 0 | 1 | 1 |
| #Crosser | 0 | 0 | 1 |
| #Complete Midfielder | 0 | 0 | 0 |
| #Playmaker | 0 | 0 | 0 |
| #Speedster | 0 | 0 | 0 |
| #Engine | 0 | 0 | 0 |
| #No_Tags | 0 | 0 | 0 |
| #Tackling | 0 | 0 | 0 |
| #Tactician | 0 | 0 | 0 |
| #Poacher | 0 | 0 | 0 |
| #Complete Defender | 0 | 0 | 0 |
| #Strength | 0 | 0 | 0 |
| #Tactician | 0 | 0 | 0 |
| #Tackling | 0 | 0 | 0 |
| #Playmaker | 0 | 0 | 0 |
| Chip Shot | 1 | 1 | 0 |
| Finesse Shot | 1 | 1 | 0 |
| Long Shot Taker | 1 | 0 | 1 |
| One Club Player | 1 | 0 | 0 |
| Outside Foot Shot | 1 | 1 | 1 |
| Playmaker | 1 | 0 | 0 |
| Technical Dribbler | 1 | 0 | 0 |
| Solid Player | 0 | 1 | 0 |
| Flair | 0 | 0 | 1 |
| Power Free-Kick | 0 | 0 | 1 |
| Speed Dribbler | 0 | 0 | 1 |
| Injury Prone | 0 | 0 | 0 |
| Early Crosser | 0 | 0 | 0 |
| Leadership | 0 | 0 | 0 |
| Long Passer | 0 | 0 | 0 |
| Comes For Crosses | 0 | 0 | 0 |
| GK Long Throw | 0 | 0 | 0 |
| Rushes Out Of Goal | 0 | 0 | 0 |
| Saves with Feet | 0 | 0 | 0 |
| Team Player | 0 | 0 | 0 |
| Dives Into Tackles | 0 | 0 | 0 |
| Power Header | 0 | 0 | 0 |
| Cautious With Crosses | 0 | 0 | 0 |
| Long Throw-in | 0 | 0 | 0 |
| No Traits | 0 | 0 | 0 |
| Giant Throw-in | 0 | 0 | 0 |
one player can plays multiple position. However, as we can see the first postion in player_postion is there main position. Therefore, we are going to make it their only position.
df$player_positions <- sapply(strsplit(df$player_positions, ", "), `[`, 1)
defence <- c("CB", "RB", "LB", "LWB", "RWB", "LCB", "RCB")
midfielder <- c("CM", "CDM","CAM","LM","RM", "LAM", "RAM", "LCM", "RCM", "LDM", "RDM")
df %<>% mutate(Class = if_else(player_positions %in% "GK", "GK",
if_else(player_positions %in% defence, "DEF",
if_else(player_positions %in% midfielder, "MID", "FWD"))))
rm(defence, midfielder)
Now we finished cleaning our data and can begins ours analysis.
# save df into .Rdata format
save(df, file = "../output/fifa_22_tidydata_cleaned.Rdata")
list.files(path = "../output")
## [1] "FIFA_22_Analysis_files" "FIFA_22_Analysis.html"
## [3] "fifa_22_tidydata_cleaned.Rdata" "fifa_22_tidydata_raw.Rdata"
load("../output/fifa_22_tidydata_cleaned.Rdata")
df %>%
group_by(nationality_name) %>%
summarise(n_player = n()) %>%
arrange(desc(n_player)) %$%
kable((head(.,10)), "html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| nationality_name | n_player |
|---|---|
| England | 1719 |
| Germany | 1214 |
| Spain | 1086 |
| France | 980 |
| Argentina | 960 |
| Brazil | 897 |
| Japan | 546 |
| Netherlands | 439 |
| United States | 413 |
| Poland | 403 |
options(repr.plot.width = 12, repr.plot.heigh = 8)
world_map <- map_data("world")
numofplayers <- world_map %>%
mutate(region = as.character(region)) %>%
left_join((df %>% mutate(Nationality = as.character(nationality_name),
Nationality = if_else(nationality_name %in% "England",
"UK", nationality_name)) %>%
count(Nationality, name = "Number of Player") %>%
rename(region = Nationality) %>%
mutate(region = as.character(region))), by = "region")
ggplotly(ggplot(numofplayers, aes(long, lat, group = group))+
geom_polygon(aes(fill = `Number of Player` ), color = "white", show.legend = FALSE)+
scale_fill_viridis_c(option = "C")+
theme_fivethirtyeight()+
labs(fill = "Number of Player",
title = "Number of Player From Around the World"))
There are 163 countries in the database and European Countries have most player. The England has the most player in the game with 1719 players, following with Germany and Spain with 1214 and 1086 players.
n_clubs <- length(unique(df$club_team_id))
paste0("Total number of clubs :", n_clubs)
## [1] "Total number of clubs :702"
max_pa <- df %>%
arrange(desc(potential)) %>%
slice(1:1)
max_ca <- df %>%
arrange(desc(overall)) %>%
slice(1:1)
cat("Best Player\n--------------------------------------------\n",paste0("Maximum Potentia :", max_pa$short_name),"\n",paste0("Maximum Overall Perforamnce :", max_ca$short_name))
## Best Player
## --------------------------------------------
## Maximum Potentia :K. Mbappé
## Maximum Overall Perforamnce :L. Messi
atts_list <- c("pace", "shooting", "passing", "dribbling", "defending", "physic", "attacking_crossing", "attacking_finishing", "attacking_heading_accuracy", "attacking_short_passing", "attacking_volleys", "skill_dribbling", "skill_curve", "skill_fk_accuracy", "skill_long_passing", "skill_ball_control", "movement_acceleration", "movement_sprint_speed", "movement_agility", "movement_reactions", "movement_balance", "power_shot_power", "power_jumping", "power_stamina", "power_strength", "power_long_shots", "mentality_aggression", "mentality_interceptions", "mentality_positioning", "mentality_vision", "mentality_penalties","mentality_composure", "defending_marking_awareness", "defending_standing_tackle", "defending_sliding_tackle", "goalkeeping_diving", "goalkeeping_handling", "goalkeeping_kicking", "goalkeeping_positioning", "goalkeeping_reflexes", "goalkeeping_speed")
atts <- df %>%
select(short_name, all_of(atts_list))
bestof = melt(atts, id.vars = "short_name", variable.name = "Attribute")
bestof = bestof[, .SD[which.max(value)], by= Attribute][, c("Attribute", "short_name")]
kable(bestof, "html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| Attribute | short_name |
|---|---|
| pace | K. Mbappé |
| shooting | Cristiano Ronaldo |
| passing | K. De Bruyne |
| dribbling | L. Messi |
| defending | V. van Dijk |
| physic | Casemiro |
| attacking_crossing | K. De Bruyne |
| attacking_finishing | L. Messi |
| attacking_heading_accuracy | L. de Jong |
| attacking_short_passing | K. De Bruyne |
| attacking_volleys | L. Suárez |
| skill_dribbling | L. Messi |
| skill_curve | Quaresma |
| skill_fk_accuracy | L. Messi |
| skill_long_passing | K. De Bruyne |
| skill_ball_control | L. Messi |
| movement_acceleration | K. Mbappé |
| movement_sprint_speed | K. Mbappé |
| movement_agility | Neymar Jr |
| movement_reactions | L. Messi |
| movement_balance | R. Fraser |
| power_shot_power | A. Kolarov |
| power_jumping | Cristiano Ronaldo |
| power_stamina | N. Kanté |
| power_strength | A. Akinfenwa |
| power_long_shots | L. Messi |
| mentality_aggression | B. Pearson |
| mentality_interceptions | N. Kanté |
| mentality_positioning | T. Müller |
| mentality_vision | L. Messi |
| mentality_penalties | Neymar Jr |
| mentality_composure | L. Messi |
| defending_marking_awareness | G. Chiellini |
| defending_standing_tackle | N. Kanté |
| defending_sliding_tackle | A. Wan-Bissaka |
| goalkeeping_diving | G. Donnarumma |
| goalkeeping_handling | J. Oblak |
| goalkeeping_kicking | Ederson |
| goalkeeping_positioning | S. Handanovič |
| goalkeeping_reflexes | J. Oblak |
| goalkeeping_speed | Jordi Masip |
max_value <- df %>%
arrange(desc(value_eur)) %>%
slice(1:1)
max_wage <- df %>%
arrange(desc(wage_eur)) %>%
slice(1:1)
cat("Top Earners\n----------------------------------\n", paste0("Maximum Value :", max_value$short_name), "\n", paste0("Maximum Wages :", max_wage$short_name))
## Top Earners
## ----------------------------------
## Maximum Value :K. Mbappé
## Maximum Wages :K. De Bruyne
foots <- df %>%
group_by(preferred_foot) %>%
summarise(n_player = n())
foots$p_player <- round(100 * (foots$n_player / sum(foots$n_player)))
ggplot(foots, aes(fill=preferred_foot, values=p_player))+
geom_waffle(n_rows= 10, colour= "White") +
scale_fill_manual(name= NULL,
values = c("grey50", "grey"),
labels= c("Left", "Right")) +
ggtitle("Left vs Right foot", subtitle = "There are 24 lefty out of every 100 players") +
coord_equal() +
theme_fivethirtyeight() +
theme(panel.grid.major = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank())
Player rating are normally distributed in FIFA 22, with a mean of 65.77 and standard deviation of 6.88.
df %>%
ggplot(aes(x = overall)) +
geom_histogram(color= "white", fill="darkgrey") +
ggtitle("Player Ratings Are Normally Distributed", subtitle = "The mean can be used as a measure of central tendancy") +
theme_fivethirtyeight() +
theme(axis.text.y = element_blank())
Now we can check the distibution of each leage.
top_league <- df %>%
filter(league_level == 1) %>%
group_by(league_name) %>%
summarise(avg_rating = mean(overall)) %>%
filter(avg_rating >= 70) %>%
select(league_name)
summ <- df %>%
filter(league_name %in% top_league$league_name) %>%
group_by(league_name) %>%
summarise(avg_rating = mean(overall),
n_player = n(),
avg_age = mean(age))
top_league_player <- df %>%
filter(league_name %in% top_league$league_name)
options(repr.plot.width = 12, repr.plot.heigh = 8)
ggplotly(ggplot() +
geom_histogram(top_league_player, mapping = aes(overall, fill= league_name), color= "white", fill="darkgrey") +
geom_vline(summ, mapping = aes(xintercept = avg_rating), size = 0.5, color="red") +
geom_text(summ, mapping = aes(x = avg_rating-5, y= 100, label= round(avg_rating, digits = 2))) +
facet_wrap(league_name~.) +
theme_fivethirtyeight() +
theme(legend.position = "bottom", axis.text.y = element_blank()) +
labs(y = element_blank(), x= "Overall Rating" , title = "Rating Distribution of Player in Top Leagues", subtitle = "Overall rating distribution and average overall rating of all top leagues"))
The average overall rating of each league is closely related with how skilled player in the league, the higher the average overall rating - the higher the skill. Apparently, player in Campeonato Brasileiro Série A, Czech Republic Gambrinus Liga, and Ukrainian Premier League from Brazil, Czech, and Ukrain are as skilled as player in German 1. Budesliga or French Ligue 1 from German and France and player in Spain Primera Division from Spain is the most skilled one.
df %>%
filter(!player_positions == "GK") %>%
group_by(age) %>%
summarise(Potential = mean(potential),
Overall = mean(overall)) %>%
ggplot(aes(x = age)) +
geom_line(aes(y=Overall), color= "grey50", size=1)+
ggtitle("Average rating is flaten over the years", subtitle = "Player ratings tend not to get better after the age of 30")+
theme_fivethirtyeight()
As we can see the average overall rating is flaten over the ages and drop sharply after 40.
df %>%
filter(!Class == "GK") %>%
group_by(Class, age) %>%
summarise(Rating = mean(overall)) %>%
ggplot(aes(x= age, y= Rating, group= Class)) +
geom_line(size = 1, color= "grey50") +
xlim(15, 45) +
ggtitle("Rating over the ages by position class") +
theme_fivethirtyeight() +
facet_wrap(~ Class, ncol=1) +
theme(strip.background = element_rect(fill= "darkgrey"), strip.text = element_text(color= "white", face= "bold"))
With this relationship is explored by the major position groups, we can see that defender ratings tend to beging their decline earliest at around 33 years of age, while the decline starts somewhere closer to 35 for both attackers and midfielders.
df %>%
group_by(age) %>%
summarise(Potential = mean(potential),
Overall = mean(overall)) %>%
ggplot(aes(x = age)) +
geom_line(aes(y=Potential), color= "purple", size=1, linetype= "dashed")+
geom_line(aes(y=Overall), color= "grey50", size=1)+
annotate("text", x= 30, y=73, label="Potential meets overall\ntalent at 29 years old", color= "grey50") +
ggtitle("Potential And Overall Talent Converges", subtitle = "The average ratings were taken for each age")+
theme_fivethirtyeight()
It appear that the player potential and player over all converge around when they are 29.
gk_vars <- df %>% select(contains("goalkeeping")) %>% names()
spearman_cor_overall <- df %>%
filter(player_positions != "GK") %>%
select_if(is.numeric) %>%
select(-club_team_id, -league_level, -club_jersey_number, -club_contract_valid_until, -nationality_id, -nation_team_id, -nation_jersey_number, -all_of(gk_vars)) %>%
as.matrix() %>%
na.omit() %>%
cor(method = "spearman")
pearson_cor_overall <- df %>%
filter(player_positions != "GK") %>%
select_if(is.numeric) %>%
select(-club_team_id, -league_level, -club_jersey_number, -club_contract_valid_until, -nationality_id, -nation_team_id, -nation_jersey_number, -all_of(gk_vars)) %>%
as.matrix() %>%
na.omit() %>%
cor()
cor_colnames <- colnames(spearman_cor_overall)
spearman_cor_overall <- spearman_cor_overall[,2] %>% data.frame()
spearman_cor_overall <- cbind(cor_colnames, spearman_cor_overall) %>% arrange(desc(`.`))
pearson_cor_overall <- pearson_cor_overall[,2] %>% data.frame()
pearson_cor_overall <- cbind(cor_colnames, pearson_cor_overall) %>% arrange(desc(`.`))
spearman_cor_overall %>% left_join(pearson_cor_overall, by="cor_colnames") %>% rename(Feature = cor_colnames, Spearman = `..x`, Pearson = `..y`) %>% filter(Feature != "overall") %>% head(10) %$%
kable((head(.,10)), "html") %>%
kable_styling(bootstrap_options = c("striped","hover", "condensed"), font_size = 8) %>%
scroll_box(width = "100%")
| Feature | Spearman | Pearson |
|---|---|---|
| value_eur | 0.8815896 | 0.5627151 |
| movement_reactions | 0.8721298 | 0.8755071 |
| mentality_composure | 0.7967710 | 0.8103614 |
| attacking_short_passing | 0.7694905 | 0.7799224 |
| skill_ball_control | 0.7675370 | 0.7636762 |
| wage_eur | 0.7624437 | 0.6056264 |
| release_clause_eur | 0.7228825 | 0.5366370 |
| passing | 0.6959918 | 0.7150010 |
| dribbling | 0.6790794 | 0.6664023 |
| skill_dribbling | 0.6149346 | 0.5723942 |
These are 10 highly correalated attrs with overall.
tile_data <- df %>%
select_if(is.numeric) %>%
select(-all_of(gk_vars)) %>%
left_join(df %>% select(sofifa_id, player_positions, Class), by= "sofifa_id") %>%
select(starts_with("attacking_"), starts_with("skill_"), starts_with("movement_"), starts_with("power_"), starts_with("mentality_"), starts_with("defending_"), player_positions, Class) %>% select(-skill_moves) %>%
rename_all(~stringr::str_replace_all(.,"^attacking_","")) %>%
rename_all(~stringr::str_replace_all(.,"^skill_","")) %>%
rename_all(~stringr::str_replace_all(.,"^movement_","")) %>%
rename_all(~stringr::str_replace_all(.,"^power_","")) %>%
rename_all(~stringr::str_replace_all(.,"^mentality_","")) %>%
rename_all(~stringr::str_replace_all(.,"^defending_",""))
tile_data %<>% filter(player_positions != "GK") %>%
gather(key= Attribute, value= Value, -player_positions, -Class) %>%
group_by(Class, player_positions, Attribute) %>%
summarise(MedianValue = median(Value, na.rm = T)) %>%
ggplot(aes(x= Attribute, y= player_positions)) +
geom_tile(aes(fill = MedianValue), colour= "black") +
geom_text(aes(label = MedianValue)) +
scale_fill_gradient(low= "purple", high= "green") +
ggtitle("Defender are strong, Foward are agile", subtitle = "Analysing the median ratings for each of the attributes for each postion for player with and overall rating over 75") +
theme_fivethirtyeight() +
theme(axis.text.x = element_text(angle = 45, hjust= 1), strip.text = element_text(face= "bold", size=12), legend.position = "none") +
facet_wrap(~ Class, scales = "free", ncol= 1)
Top_20_clubs <- df %>%
group_by(club_name) %>%
summarise(AverageRating = mean(overall, na.rm= T)) %>%
arrange(desc(AverageRating)) %>%
head(n=20) %>%
pull(club_name)
df %>%
filter(club_name %in% Top_20_clubs) %>%
mutate(Top3 = ifelse(club_name %in% c("Juventus", "Paris Saint-Germain", "Inter"), "Yes", "No")) %>%
ggplot(aes(x= reorder(club_name, overall), y=overall, fill= Top3)) +
geom_boxplot(color= "black") +
scale_fill_manual(values = c("lightgrey", "purple")) +
ggtitle("Juventus has the Hightest Overall", subtitle = "The average overall rating of the 20 highest rated teams in the game, sorted in decending order") +
coord_flip() +
theme_fivethirtyeight() +
theme(legend.position = "none")